# -*- coding: utf-8 -*-
# @Time    : 2024/1/7 21:48
# @Author  : micah
# @File    : 6.xpath练习.py
# @Software: PyCharm


from lxml import etree

text = ''' <div> <ul> 
        <li class="item-1"><a href="link1.html">first item</a></li> 
        <li class="item-1"><a href="link2.html">second item</a></li> 
        <li class="item-inactive"><a href="link3.html">third item</a></li> 
        <li class="item-1"><a href="link4.html">fourth item</a></li> 
        <li class="item-0"><a href="link5.html">fifth item</a> 
        </ul> </div> '''


tree = etree.HTML(text)

href_list = tree.xpath('//li[@class="item-1"]/a/@href')
print(href_list)

content_list = tree.xpath('//li[@class="item-1"]//text()')
print(content_list)

for content, href in zip(content_list, href_list):
    item = dict()
    item['content'] = content
    item['href'] = href

    print(item)


